# -*- coding: utf-8 -*-
import copy
import random
from zc_core.spiders.base import BaseSpider
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.item_data_dao import ItemDataDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter
from jesgcc.utils.login import SeleniumLogin
from jesgcc.rules import *


class FullSpider(BaseSpider):
    name = "full"

    # 品牌url
    brand_url = 'http://j.esgcc.com.cn/product/goodsBody'
    # 商城价url
    price_url = 'http://j.esgcc.com.cn/product/goodsSpecs'
    # 供应商商品url
    sp_sku_url = 'http://j.esgcc.com.cn/product/goodsinfo?goodsId={}'
    # 通过详情对分类补充url
    item_url = 'http://j.esgcc.com.cn/product/{}.html/?grade=1'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def start_requests(self):
        settings = get_project_settings()
        self.cookies = SeleniumLogin().get_cookies()
        # self.cookies = {'TOKEN': 'ff9fd651-28a5-486c-9d7d-5d6d52b4b027'}
        if not self.cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', self.cookies)

        # 商品分类补全
        sku_cat_list = ItemDataDao().get_batch_data_list(self.batch_no, fields={'_id': 1, 'catalog3Id': 1},
                                                         query={'catalog1Id': None})
        self.logger.info('商品分类补全：%s' % (len(sku_cat_list)))
        random.shuffle(sku_cat_list)
        for sku_cat in sku_cat_list:
            sku_id = sku_cat.get("_id")

            yield scrapy.Request(
                url=self.item_url.format(sku_id),
                method="GET",
                meta={
                    'reqType': 'detail',
                    'batchNo': self.batch_no,
                    'skuId': sku_id
                },
                headers={
                    'Host': 'j.esgcc.com.cn',
                    'Connection': 'keep-alive',
                    'Cache-Control': 'max-age=0',
                    'Upgrade-Insecure-Requests': '1',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                    'Referer': 'http://j.esgcc.com.cn/indexView',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
                },
                cookies=self.cookies,
                callback=self.parse_sku_catalog,
                errback=self.error_back
            )
        # 补充商品详情
        sku_list = ItemDataDao().get_batch_data_list(self.batch_no, fields={'_id': 1, 'supplierName': 1},
                                                     query={'supplierSkuCode': None})
        self.logger.info('补充商品详情：%s' % (len(sku_list)))
        random.shuffle(sku_list)
        for sku in sku_list:
            sku_id = sku.get("_id")
            sp_name = sku.get("supplierName")
            # 避免无效采集
            offline_time = sku.get('offlineTime', 0)
            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue

            yield scrapy.FormRequest(
                url=self.brand_url,
                method="POST",
                meta={
                    'reqType': 'full',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                    'supplierName': sp_name
                },
                headers={
                    'Host': 'j.esgcc.com.cn',
                    'Connection': 'keep-alive',
                    'Accept': '*/*',
                    'Origin': 'http://j.esgcc.com.cn',
                    'X-Requested-With': 'XMLHttpRequest',
                    'Referer': 'http://j.esgcc.com.cn/product/{}.html/?grade=1'.format(sku_id),
                    'Content-Type': 'application/x-www-form-urlencoded',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                },
                formdata={
                    'goodsId': sku_id
                },
                cookies=self.cookies,
                callback=self.parse_item_brand,
                errback=self.error_back
            )

    def parse_item_brand(self, response):
        meta = response.meta
        sku_id = meta.get("skuId")
        item = parse_item_brand(response)
        if item:
            self.logger.info('商品: [%s]' % sku_id)
            yield scrapy.FormRequest(
                url=self.price_url,
                method="POST",
                meta={
                    'reqType': 'full',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                    'item': item
                },
                headers={
                    'Host': 'j.esgcc.com.cn',
                    'Connection': 'keep-alive',
                    'Accept': '*/*',
                    'X-Requested-With': 'XMLHttpRequest',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                    'Origin': 'http://j.esgcc.com.cn',
                    'Referer': 'http://j.esgcc.com.cn/product/{}.html/?grade=1'.format(sku_id),
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7',
                },
                formdata={
                    'goodsId': sku_id
                },
                cookies=self.cookies,
                callback=self.parse_item_price,
                errback=self.error_back
            )
        else:
            self.logger.error('无品牌: sku=%s' % sku_id)

    def parse_item_price(self, response):
        meta = response.meta
        sku_id = meta.get("skuId")
        item_supply = parse_item_price(response)
        if item_supply:
            # 请求供应商商品url
            yield scrapy.FormRequest(
                url=self.sp_sku_url.format(sku_id),
                method="GET",
                meta={
                    'reqType': 'price',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                    'item': item_supply
                },
                headers={
                    'Host': 'j.esgcc.com.cn',
                    'Connection': 'keep-alive',
                    'Accept': '*/*',
                    'X-Requested-With': 'XMLHttpRequest',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                    'Origin': 'http://j.esgcc.com.cn',
                    'Referer': 'http://j.esgcc.com.cn/product/{}.html/?grade=1'.format(sku_id),
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7',
                },
                cookies=self.cookies,
                callback=self.parse_item_sp_sku_url,
                errback=self.error_back
            )

    def parse_item_sp_sku_url(self, response):
        if response:
            item_supply = parse_item_sp_sku_url(response)
            if item_supply:
                yield item_supply

    def parse_sku_catalog(self, response):
        if response:
            item_supply, cats = parse_sku_catalog(response)
            if item_supply:
                yield item_supply
            if cats and len(cats) > 0:
                yield Box('catalog', self.batch_no, cats)


